library(DBI)
library(odbc)
conn <- dbConnect(odbc::odbc(), driver="PostgreSQL Unicode",
server="10.0.0.220", database="environment",
uid="postgres", pwd="env19", port=6432)
dbListTables(conn)
## [1] "ag_census" "arable_land"
## [3] "consumption" "fws_species_count"
## [5] "fws_species_year" "global_mean_sea_level"
## [7] "global_temperature" "groundwater"
## [9] "iucn_species_count" "ocean_data"
## [11] "ocean_data_vw" "plants_assessments"
## [13] "ppm_month" "ppm_week"
## [15] "sea_ice_extent" "us_co2_emissions"
## [17] "us_gdp" "us_renewable_energy"
## [19] "usa_population" "world_co2_emissions"
## [21] "world_gdp" "world_population"
dbDisconnect(conn)
library(rJava)
library(RJDBC)
drv <- JDBC("org.postgresql.Driver",
"/usr/lib/jvm/java-8-oracle/lib/postgresql-42.2.2.jar")
conn <- dbConnect(drv, "jdbc:postgresql://10.0.0.220:6432/environment", "postgres", "env19")
dbListTables(conn, schema="public")
## [1] "ag_census_pkey" "arable_land_pkey"
## [3] "co2_emissions_pkey" "consumption_pkey"
## [5] "fws_species_count_pkey" "fws_species_year_pkey"
## [7] "global_mean_sea_level_pkey" "global_temperature_pkey"
## [9] "groundwater_pkey" "iucn_species_count_pkey"
## [11] "ocean_data_pkey" "plants_assessments_pkey"
## [13] "ppm_month_pkey" "ppm_week_pkey"
## [15] "sea_ice_extent_pkey" "us_gdp_pkey"
## [17] "us_renewable_energy_pkey" "usa_population_pkey"
## [19] "world_co2_emissions_pkey" "world_gdp_pkey"
## [21] "world_population_pkey" "ag_census_id_seq"
## [23] "arable_land_id_seq" "co2_emissions_id_seq"
## [25] "consumption_id_seq" "fws_species_count_id_seq"
## [27] "fws_species_year_id_seq" "global_mean_sea_level_id_seq"
## [29] "global_temperature_id_seq" "groundwater_id_seq"
## [31] "iucn_species_count_id_seq" "ocean_data_id_seq"
## [33] "plants_assessments_id_seq" "ppm_month_id_seq"
## [35] "ppm_week_id_seq" "sea_ice_extent_id_seq"
## [37] "us_gdp_id_seq" "us_renewable_energy_id_seq"
## [39] "usa_population_id_seq" "world_co2_emissions_id_seq"
## [41] "world_gdp_id_seq" "world_population_id_seq"
## [43] "ag_census" "arable_land"
## [45] "consumption" "fws_species_count"
## [47] "fws_species_year" "global_mean_sea_level"
## [49] "global_temperature" "groundwater"
## [51] "iucn_species_count" "ocean_data"
## [53] "plants_assessments" "ppm_month"
## [55] "ppm_week" "sea_ice_extent"
## [57] "us_co2_emissions" "us_gdp"
## [59] "us_renewable_energy" "usa_population"
## [61] "world_co2_emissions" "world_gdp"
## [63] "world_population" "ocean_data_vw"
dbDisconnect(conn)
## [1] TRUE
library(RPostgreSQL)
conn <- dbConnect(RPostgreSQL::PostgreSQL(), host="10.0.0.220", dbname="environment",
user="postgres", password="env19", port=6432)
dbListTables(conn)
## [1] "ag_census" "arable_land"
## [3] "consumption" "fws_species_count"
## [5] "fws_species_year" "global_mean_sea_level"
## [7] "global_temperature" "groundwater"
## [9] "iucn_species_count" "ocean_data"
## [11] "plants_assessments" "ppm_month"
## [13] "ppm_week" "sea_ice_extent"
## [15] "us_gdp" "us_renewable_energy"
## [17] "usa_population" "world_co2_emissions"
## [19] "world_gdp" "world_population"
## [21] "us_co2_emissions"
CREATE TABLE global_temperature (
id SERIAL NOT NULL PRIMARY KEY,
year INT,
period VARCHAR(50),
global_mean NUMERIC(5,2)
);
COPY global_temperature (year, period, global_mean) FROM '/path/to/csv/global_temperature.csv' DELIMITER ',' CSV HEADER;
COMMENT ON TABLE global_temperature IS 'Source: NASA - Combined Land-Surface Air and Sea-Surface Water Temperature Anomalies (Land-Ocean Temperature Index, LOTI) (https://data.giss.nasa.gov/gistemp/)';
SELECT relname as table, obj_description(oid) as comment
FROM pg_class
WHERE relkind = 'r'
AND obj_description(oid) IS NOT NULL
ORDER BY relname
| table | comment |
|---|---|
| ag_census | U.S. Department of Agriculture, Census Data Query Tool (CDQT): https://www.nass.usda.gov/Quick_Stats/CDQT/chapter/1/table/1 |
| arable_land | Source: World Bank - Arable land (% of land area) (https://data.worldbank.org/indicator/AG.LND.ARBL.ZS?end=2016&start=1961&view=chart) |
| consumption | Source: EIA - Monthly Energy Review, publication of recent and historical energy statistics (https://www.eia.gov/totalenergy/data/monthly/) |
| fws_species_count | Source: US FWS - U.S. Federal Endangered and Threatened Species (https://ecos.fws.gov/ecp/species-reports) |
| fws_species_year | Source: US FWS - U.S. Federal Endangered and Threatened Species (https://ecos.fws.gov/ecp/species-reports) |
| global_mean_sea_level | Source: NASA - Global Mean Sea Level (mm): https://climate.nasa.gov/vital-signs/sea-level/ |
| global_temperature | Source: NASA - Combined Land-Surface Air and Sea-Surface Water Temperature Anomalies (Land-Ocean Temperature Index, LOTI) (https://data.giss.nasa.gov/gistemp/) |
| groundwater | U.S. Geological Survey (USGS) Groundwater-Level Annual Statistics for the Nation: https://waterdata.usgs.gov/nwis/annual?referred_module=gw&search_criteria=site_tp_cd&submitted_form=introduction |
| iucn_species_count | Source: IUCN - Red List of Threatened Species (https://www.iucnredlist.org/resources/summary-statistics#Summary%20Tables) |
| ocean_data | Source: NOAA - Global Ocean Data Analysis Project (https://data.nodc.noaa.gov/cgi-bin/iso?id=gov.noaa.nodc:0162565#) |
| plants_assessments | Source: Botanic Gardens Conservation International - Plant Threat Search: https://tools.bgci.org/threat_search.php |
| ppm_month | Source: NOAA - Mauna Loa CO2 records Carbon PPM (https://www.esrl.noaa.gov/gmd/ccgg/trends/data.html) |
| ppm_week | Source: NOAA - Mauna Loa CO2 records Carbon PPM (https://www.esrl.noaa.gov/gmd/ccgg/trends/data.html) |
| sea_ice_extent | Source: NSIDC - Sea Ice Index Data (https://nsidc.org/data/seaice_index/archives) |
| us_gdp | Source: Federal Reserve Bank of St. Louis - US Real Gross Domestic Product by Industry: https://fred.stlouisfed.org/categories/33045 |
| us_renewable_energy | Source: EIA - Renewable Energy Production and Consumption by Source: https://www.eia.gov/totalenergy/data/monthly/ |
| usa_population | Source: Federal Reserve Bank of St. Louis - U.S. Monthly Population (https://fred.stlouisfed.org/series/POPTHM) |
| world_co2_emissions | Source: World Bank - World CO2 emissions (kt): https://data.worldbank.org/indicator/EN.ATM.CO2E.KT?end=2014&start=1960&view=chart |
| world_gdp | Source: World Bank - World GDP (Current US$): https://data.worldbank.org/indicator/NY.GDP.MKTP.CD |
| world_population | Source: World Bank - Arable land (% of land area) (https://data.worldbank.org/indicator/AG.LND.ARBL.ZS?end=2016&start=1961&view=chart) |
SELECT p.date_year,
sum(p.average_ppm) as carbon_ppm_total,
avg(p.average_ppm) as carbon_ppm_mean,
sum(c.energy_consumed) as btu_consumed_total,
avg(c.energy_consumed) as btu_consumed_mean,
sum(e.energy_co2) as co2_emissions_total,
avg(e.energy_co2) as co2_emissions_mean
FROM ppm_month p
JOIN consumption c
ON p.date_year = c.date_year AND p.date_month = c.date_month and c.msn = 'TXRCBUS'
JOIN us_co2_emissions e
ON p.date_year = e.date_year AND p.date_month = e.date_month and e.msn = 'TETCEUS'
GROUP BY p.date_year
ORDER BY p.date_year
| date_year | carbon_ppm_total | carbon_ppm_mean | btu_consumed_total | btu_consumed_mean | co2_emissions_total | co2_emissions_mean | |
|---|---|---|---|---|---|---|---|
| 38 | 2010 | 4678.79 | 389.8992 | 6641.355 | 553.4462 | 5585.741 | 465.4784 |
| 39 | 2011 | 4699.83 | 391.6525 | 6473.666 | 539.4722 | 5446.133 | 453.8444 |
| 40 | 2012 | 4726.24 | 393.8533 | 5684.503 | 473.7086 | 5237.300 | 436.4417 |
| 41 | 2013 | 4758.25 | 396.5208 | 6689.368 | 557.4473 | 5363.018 | 446.9182 |
| 42 | 2014 | 4783.77 | 398.6475 | 7007.139 | 583.9283 | 5411.193 | 450.9327 |
| 43 | 2015 | 4810.01 | 400.8342 | 6465.092 | 538.7577 | 5264.776 | 438.7313 |
| 44 | 2016 | 4850.87 | 404.2392 | 6033.098 | 502.7582 | 5172.402 | 431.0335 |
| 45 | 2017 | 4878.64 | 406.5533 | 6111.580 | 509.2983 | 5130.589 | 427.5491 |
| 46 | 2018 | 4902.26 | 408.5217 | 6896.516 | 574.7097 | 5267.750 | 438.9792 |
| 47 | 2019 | 410.83 | 410.8300 | 1163.200 | 1163.2000 | 493.839 | 493.8390 |
SELECT p.date_year,
p.date_month,
CONCAT(p.date_year, '-', p.date_month, '-01')::date as date_day,
p.average_ppm as "carbon ppm",
c.energy_consumed as "btu consumed",
e.energy_co2 as "co2 emissions"
FROM ppm_month p
JOIN consumption c
ON p.date_year = c.date_year AND p.date_month = c.date_month and c.msn = 'TXRCBUS'
JOIN us_co2_emissions e
ON p.date_year = e.date_year AND p.date_month = e.date_month and e.msn = 'TETCEUS'
ORDER BY p.date_year, p.date_month
metric_ts <- xts(x=metrics_df[c("carbon ppm", "btu consumed", "co2 emissions")],
order.by=metrics_df$date_day)
plot(metric_ts, main = "Energy and Carbon PPM Metrics",
legend.loc="bottomright",
col = c("red", "blue", "darkgreen"),
yaxis.right=FALSE,
axes=FALSE,
lwd=1,
cex.main=3,
major.ticks="years",
major.format="%Y",
minor.format="%Y",
grid.ticks.lty=1)
SELECT energy_type,
date,
SUM(production) AS production,
SUM(consumption) AS consumption
FROM us_renewable_energy
GROUP BY energy_type,
date
ORDER BY energy_type,
date
| energy_type | date | production | consumption | |
|---|---|---|---|---|
| 4678 | Wood Energy | 2019-02-01 | 190.887 | 182.491 |
| 4679 | Wood Energy | 2019-03-01 | 198.621 | 191.507 |
| 4680 | Wood Energy | 2019-04-01 | 195.791 | 187.670 |
| 4681 | Wood Energy | 2019-05-01 | 201.743 | 193.775 |
| 4682 | Wood Energy | 2019-06-01 | 198.379 | 189.036 |
| 4683 | Wood Energy | 2019-07-01 | 205.023 | 196.873 |
par(mfrow=c(3,3), mar=c(5, 5, 2, 1))
output <- by(renewable_df, renewable_df$energy_type, function(sub) {
metric_ts <- xts(x=sub[c("production", "consumption")],
order.by=sub$date)
print(plot(metric_ts, main = sub$energy_type[1],
legend.loc="bottomright",
col = seaborn_palette[c(1,4)],
yaxis.right=FALSE,
axes=FALSE,
lwd=1,
cex.main=3,
major.ticks="years",
major.format="%Y",
minor.format="%Y",
grid.ticks.lty=1))
})
WITH c1 AS
(SELECT CONCAT((date_year/10)::int * 10, 's') as decade,
REPLACE(description, 'Primary Energy Consumed by the ', '') AS "Sector",
SUM(energy_consumed) AS "Primary Energy Consumed"
FROM consumption
WHERE msn IN ('TXICBUS', 'TXRCBUS', 'TXACBUS', 'TXCCBUS', 'TXEIBUS')
GROUP BY CONCAT((date_year/10)::int * 10, 's'),
REPLACE(description, 'Primary Energy Consumed by the ', '')
),
c2 AS
(SELECT CONCAT((date_year/10)::int * 10, 's') as decade,
REPLACE(REPLACE(description, 'Total Energy ', ''), ' CO2 Emissions', '') AS "Sector",
SUM(energy_co2) AS "Total CO2 Emissions"
FROM us_co2_emissions
WHERE msn IN ('TERCEUS', 'TECCEUS', 'TEACEUS', 'TXEIEUS')
GROUP BY CONCAT((date_year/10)::int * 10, 's'),
REPLACE(REPLACE(description, 'Total Energy ', ''), ' CO2 Emissions', '')
)
SELECT c1.decade, c1."Sector", c1."Primary Energy Consumed", c2."Total CO2 Emissions"
FROM c1
LEFT JOIN c2
ON c1."Sector" = c2."Sector" AND c1.decade = c2.decade
ORDER BY c1.decade, c1."Sector"
| decade | Sector | Primary Energy Consumed | Total CO2 Emissions | |
|---|---|---|---|---|
| 35 | 2000s | Transportation Sector | 546616.33 | 38377.90 |
| 36 | 2010s | Commercial Sector | 77171.71 | 17076.89 |
| 37 | 2010s | Electric Power Sector | 693591.66 | 35901.58 |
| 38 | 2010s | Industrial Sector | 388661.21 | NA |
| 39 | 2010s | Residential Sector | 117157.95 | 19437.04 |
| 40 | 2010s | Transportation Sector | 490149.36 | 33393.26 |
plot_mat <- with(consumed_df, tapply(`Primary Energy Consumed`, list(decade, `Sector`), sum))
par(mar=c(5, 5, 2, 1))
barplot(plot_mat, main="U.S. Primary Energy Consumption by Sector", cex.main=1.5,
col=seaborn_palette[1:8], ylim=c(0, 9E5), xaxt="n", yaxt="n", beside=TRUE)
axis(side=1, at=c(5, 14, 23, 32, 41), labels=colnames(plot_mat), padj=0.9, pos=c(0,0))
axis(side=2, at=axTicks(2), labels=format(axTicks(2), big.mark=',', scientific=FALSE), las=1)
legend("top", row.names(plot_mat), fill=seaborn_palette[1:8], ncol=8)
plot_mat <- with(consumed_df, tapply(`Total CO2 Emissions`, list(decade, `Sector`), sum))
plot_mat <- plot_mat[, c("Commercial Sector", "Electric Power Sector",
"Residential Sector", "Transportation Sector"), drop=FALSE]
plot_mat <- plot_mat[complete.cases(plot_mat),,drop=FALSE]
par(mar=c(5, 8, 2, 1))
barplot(plot_mat, main="U.S. CO2 Emissions by Sector", cex.main=1.5,
col=seaborn_palette[1:5], ylim=c(0, 6E4), xaxt="n", yaxt="n", beside=TRUE)
axis(side=1, at=c(3, 9, 15, 21)+0.5, labels=colnames(plot_mat), padj=0.9, pos=c(0,0))
axis(side=2, at=axTicks(2), labels=format(axTicks(2), big.mark=',', scientific=FALSE), las=1)
legend("top", row.names(plot_mat), fill=seaborn_palette[1:5], ncol=5)
sql <- "WITH sub AS
(SELECT CONCAT((date_year/10)::int * 10, 's') as decade,
REPLACE($1, '%', '') as sector,
REPLACE(REPLACE(description,
CONCAT(REPLACE($2, '%', ''), ' CO2 Emissions'),
''),
' ', '\n') AS \"Type\",
energy_co2
FROM us_co2_emissions
WHERE description LIKE $3)
SELECT decade, sector, \"Type\",
SUM(energy_co2) AS \"Total CO2 Emissions\"
FROM sub
GROUP BY decade, sector, \"Type\"
ORDER BY decade, sector, \"Type\"
"
params <- paste(c("%Transportation", "%Residential", "%Commercial"), "Sector%")
emissions_type_df <- do.call(rbind, lapply(params, function(p)
dbGetQuery(conn, sql, param=list(p, p, p))))
output <- by(emissions_type_df, emissions_type_df$sector, function(sub) {
plot_mat <- with(sub, tapply(`Total CO2 Emissions`, list(decade, `Type`), sum))
barplot(plot_mat, main=paste("U.S. CO2 Emissions by", sub$sector[[1]]), cex.main=1.5,
col=seaborn_palette[1:5], ylim=c(0, ceiling(max(plot_mat, na.rm=TRUE)/1E4) * 1E4),
xaxt="n", yaxt="n", beside=TRUE)
axis(side=1, at=c(3,9,15,21,27,33,39,45,51,57,63)[seq_along(colnames(plot_mat))] + 0.5,
labels=colnames(plot_mat), padj=0.9, pos=c(0,0))
axis(side=2, at=axTicks(2), labels=format(axTicks(2), big.mark=',', scientific=FALSE), las=1)
legend("top", row.names(plot_mat), fill=seaborn_palette[1:5], ncol=5)
})
WITH pop AS
(SELECT p.year,
p.population
FROM world_population p
WHERE p.country_name = 'World'
AND p.year BETWEEN 2000 AND 2019
),
land AS
(SELECT a.year,
a.percent_arable
FROM arable_land a
WHERE a.country_name = 'World'
AND a.year BETWEEN 2000 AND 2019
),
fauna AS
(SELECT i.year,
SUM(i.species_count) AS animals_count
FROM iucn_species_count i
WHERE i.year BETWEEN 2000 AND 2019
GROUP BY i.year
),
flora AS
(SELECT p.assessment_year as year,
COUNT(*) AS plants_count
FROM plants_assessments p
WHERE p.assessment_year BETWEEN 2000 AND 2019
AND p.interpreted_status = 'Threatened'
GROUP BY p.assessment_year
),
ice AS
(SELECT s.date_year as year,
AVG(s.extent) FILTER(WHERE s.region = 'Arctic') AS arctic_sea_ice_extent,
AVG(s.extent) FILTER(WHERE s.region = 'Antarctica') AS antarctic_sea_ice_extent
FROM sea_ice_extent s
WHERE s.date_year BETWEEN 2000 AND 2019
GROUP BY s.date_year
),
ocean AS
(SELECT o.year as year,
AVG(o.tco2) AS total_co2,
AVG(o.phts25p0) AS ph_scale
FROM ocean_data o
WHERE o.year BETWEEN 2000 AND 2019
AND o.tco2 <> -9999 AND o.phts25p0 <> -9999
GROUP BY o.year
),
temp AS
(SELECT g.year as year,
AVG(g.global_mean) AS global_mean
FROM global_temperature g
WHERE g.year BETWEEN 2000 AND 2019
GROUP BY g.year
)
SELECT pop.year, pop.population, land.percent_arable,
fauna.animals_count, flora.plants_count,
ice.arctic_sea_ice_extent, ice.antarctic_sea_ice_extent,
ocean.total_co2, ocean.ph_scale, temp.global_mean
FROM pop
JOIN land USING (year)
JOIN fauna USING (year)
JOIN flora USING (year)
JOIN ice USING (year)
JOIN ocean USING (year)
JOIN temp USING (year)
ORDER BY pop.year
| year | population | percent_arable | animals_count | plants_count | arctic_sea_ice_extent | antarctic_sea_ice_extent | total_co2 | ph_scale | global_mean | |
|---|---|---|---|---|---|---|---|---|---|---|
| 7 | 2008 | 6765986891 | 10.80640 | 16928 | 3279 | 10.97785 | 12.23941 | 2181.326 | 7.685694 | 0.5155556 |
| 8 | 2009 | 6849272706 | 10.77478 | 17291 | 2294 | 10.93197 | 12.04859 | 2176.104 | 7.735574 | 0.6294444 |
| 9 | 2010 | 6932596129 | 10.74432 | 18351 | 3217 | 10.71139 | 12.10679 | 2191.072 | 7.702093 | 0.7022222 |
| 10 | 2011 | 7014792135 | 10.80189 | 19570 | 2850 | 10.48350 | 11.50057 | 2224.690 | 7.675097 | 0.5844444 |
| 11 | 2012 | 7099311892 | 10.87358 | 20219 | 7556 | 10.40610 | 12.00444 | 2187.112 | 7.731700 | 0.6183333 |
| 12 | 2013 | 7184861447 | 10.89469 | 21353 | 5465 | 10.89712 | 12.52361 | 2183.400 | 7.723483 | 0.6438889 |
par(mfrow=c(4,2), mai = c(0.3, 0.2, 0.7, 0.2))
for(x in colnames(env_world_df)[3:ncol(env_world_df)]) {
lfit <- loess(paste(x, "~ population"), data=env_world_df)
plot(as.formula(paste(x, "~ population")), env_world_df,
main=paste("population and\n", gsub("_", " ", x)), cex.main=2,
type="p", col=seaborn_palette[1], yaxt='n', xaxt='n', pch=19)
axis(side=1, at=axTicks(1), labels=format(axTicks(1), big.mark=',', scientific=FALSE))
axis(side=2, at=axTicks(2), labels=format(axTicks(2), big.mark=',', scientific=FALSE))
pop_order <- order(env_world_df$population)
lines(env_world_df$population[pop_order], lfit$fitted[pop_order], col=seaborn_palette[4], lwd=3)
}
# LINUX SHELL COMMAND CALL
system(paste0("gnome-terminal -- Rscript -e \"library(shiny); setwd('",
getwd(), "');
runApp('Postgres_As_Data_Science_DB_Shiny_App.R')\""))